Installing the libraries that are needed to be able to load and preform queries on the dataset.
library(DataComputing)
library(tidyverse)
library(rvest)
library(lubridate)
library(dplyr)
library(readr)
library(ggplot2)
library(data.table)
library(dataPreparation)
Load Data
Getting the tables into RStudio to be able to use them for evaluation. We will do this using the read functions.
file_name <- file.choose()
sampleSubmission <- data.table::fread(file_name)
sampleSubmission
file_name <- file.choose()
TrainData <- data.table::fread(file_name)
Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.
TrainData
file_name <- file.choose()
TestData <- data.table::fread(file_name)
Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.
TestData
Data Wrangling
CleanedTrain <-
TrainData %>%
rename(RevenueGrowth = `Revenue Growth`) %>%
rename(CostRevenue = `Cost of Revenue`) %>%
rename(GrossProfit = `Gross Profit`) %>%
rename(RDExpenses = `R&D Expenses`) %>%
rename(SGAExpense= `SG&A Expense`) %>%
rename(OpExpenses = `Operating Expenses`) %>%
rename(OpIncome = `Operating Income`) %>%
rename(InterestExpense = `Interest Expense`) %>%
rename(IncomeTaxExpense = `Income Tax Expense`) %>%
rename(NetIncome= `Net Income`) %>%
rename(ProfitMargin = `Profit Margin`) %>%
rename(NetProfitMargin= `Net Profit Margin`) %>%
rename(TotalCurrentAssets = `Total current assets`) %>%
rename(TotalAssets = `Total assets`) %>%
rename(TotalDebt = `Total debt`) %>%
rename(TaxLiability = `Tax Liabilities`) %>%
rename(TotalNonCurrentAssests = `Net Debt`) %>%
rename(InvestementPurchSales = `Investment purchases and sales`) %>%
rename(AssetGrowth = `Asset Growth`) %>%
rename(DebtGrowth = `Debt Growth`)
CleanedTrain
NA
CleanedTest <-
TestData %>%
rename(RevenueGrowth = `Revenue Growth`) %>%
rename(CostRevenue = `Cost of Revenue`) %>%
rename(GrossProfit = `Gross Profit`) %>%
rename(RDExpenses = `R&D Expenses`) %>%
rename(SGAExpense= `SG&A Expense`) %>%
rename(OpExpenses = `Operating Expenses`) %>%
rename(OpIncome = `Operating Income`) %>%
rename(InterestExpense = `Interest Expense`) %>%
rename(IncomeTaxExpense = `Income Tax Expense`) %>%
rename(NetIncome= `Net Income`) %>%
rename(ProfitMargin = `Profit Margin`) %>%
rename(NetProfitMargin= `Net Profit Margin`) %>%
rename(TotalCurrentAssets = `Total current assets`) %>%
rename(TotalAssets = `Total assets`) %>%
rename(TotalDebt = `Total debt`) %>%
rename(TaxLiability = `Tax Liabilities`) %>%
rename(TotalNonCurrentAssests = `Net Debt`) %>%
rename(InvestementPurchSales = `Investment purchases and sales`) %>%
rename(AssetGrowth = `Asset Growth`) %>%
rename(DebtGrowth = `Debt Growth`)
CleanedTest
Filter out the data to only have select variables for analysis
NewTrain <-
CleanedTrain %>%
select(Name, RevenueGrowth,GrossProfit,NetIncome, ProfitMargin, NetProfitMargin, TotalDebt, AssetGrowth, DebtGrowth )
NewTrain
NewTest <-
CleanedTest%>%
select(Name, RevenueGrowth,GrossProfit,NetIncome, ProfitMargin, NetProfitMargin, TotalDebt, AssetGrowth, DebtGrowth )
NewTest
Create a Class Column that Return True if RevenueGrowth > DebtGrowth and False Otherwise
ClassTableTrain <-
NewTrain %>%
transmute(Name = Name, class = RevenueGrowth > DebtGrowth)
ClassTableTrain
ClassTableTest <-
NewTrain %>%
transmute(Name = Name, class = RevenueGrowth > DebtGrowth)
ClassTableTest
fwrite(sampleSubmission, "submit.csv")
75d9fe2ea4d5fbf73bd684d0d2ed52713b8f8857
LS0tCnRpdGxlOiAiUHJvamVjdCBQaGFzZSAyIiAKc3VidGl0bGU6ICJEUzMxMCIKYXV0aG9yOiAiQXJ3YSBIYXJhcndhbGEsIEtyaXRoaWthIFNlbnRoaWwsICYgU3dhcmFsaSBLb3JnYW9ua2FyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpJbnN0YWxsaW5nIHRoZSBsaWJyYXJpZXMgdGhhdCBhcmUgbmVlZGVkIHRvIGJlIGFibGUgdG8gbG9hZCBhbmQgcHJlZm9ybSBxdWVyaWVzIG9uIHRoZSBkYXRhc2V0LgpgYGB7cn0KbGlicmFyeShEYXRhQ29tcHV0aW5nKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShydmVzdCkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkocmVhZHIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KGRhdGFQcmVwYXJhdGlvbikKYGBgCgoKIyMjIExvYWQgRGF0YQpHZXR0aW5nIHRoZSB0YWJsZXMgaW50byBSU3R1ZGlvIHRvIGJlIGFibGUgdG8gdXNlIHRoZW0gZm9yIGV2YWx1YXRpb24uIFdlIHdpbGwgZG8gdGhpcyB1c2luZyB0aGUgcmVhZCBmdW5jdGlvbnMuIApgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKc2FtcGxlU3VibWlzc2lvbiA8LSBkYXRhLnRhYmxlOjpmcmVhZChmaWxlX25hbWUpCnNhbXBsZVN1Ym1pc3Npb24KYGBgCgpgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKVHJhaW5EYXRhIDwtIGRhdGEudGFibGU6OmZyZWFkKGZpbGVfbmFtZSkKVHJhaW5EYXRhCmBgYAoKCgpgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKVGVzdERhdGEgPC0gZGF0YS50YWJsZTo6ZnJlYWQoZmlsZV9uYW1lKQpUZXN0RGF0YQpgYGAKCgojIyMgRGF0YSBXcmFuZ2xpbmcKCmBgYHtyfQoKQ2xlYW5lZFRyYWluIDwtCiAgVHJhaW5EYXRhICU+JQogIHJlbmFtZShSZXZlbnVlR3Jvd3RoID0gYFJldmVudWUgR3Jvd3RoYCkgJT4lCiAgcmVuYW1lKENvc3RSZXZlbnVlID0gYENvc3Qgb2YgUmV2ZW51ZWApICU+JQogIHJlbmFtZShHcm9zc1Byb2ZpdCA9IGBHcm9zcyBQcm9maXRgKSAlPiUKICByZW5hbWUoUkRFeHBlbnNlcyA9IGBSJkQgRXhwZW5zZXNgKSAlPiUKICByZW5hbWUoU0dBRXhwZW5zZT0gYFNHJkEgRXhwZW5zZWApICU+JQogIHJlbmFtZShPcEV4cGVuc2VzID0gYE9wZXJhdGluZyBFeHBlbnNlc2ApICU+JQogIHJlbmFtZShPcEluY29tZSA9IGBPcGVyYXRpbmcgSW5jb21lYCkgJT4lCiAgcmVuYW1lKEludGVyZXN0RXhwZW5zZSA9IGBJbnRlcmVzdCBFeHBlbnNlYCkgJT4lCiAgcmVuYW1lKEluY29tZVRheEV4cGVuc2UgPSBgSW5jb21lIFRheCBFeHBlbnNlYCkgJT4lCiAgcmVuYW1lKE5ldEluY29tZT0gYE5ldCBJbmNvbWVgKSAlPiUKICByZW5hbWUoUHJvZml0TWFyZ2luID0gYFByb2ZpdCBNYXJnaW5gKSAlPiUKICByZW5hbWUoTmV0UHJvZml0TWFyZ2luPSBgTmV0IFByb2ZpdCBNYXJnaW5gKSAlPiUKICByZW5hbWUoVG90YWxDdXJyZW50QXNzZXRzID0gYFRvdGFsIGN1cnJlbnQgYXNzZXRzYCkgJT4lCiAgcmVuYW1lKFRvdGFsQXNzZXRzID0gYFRvdGFsIGFzc2V0c2ApICU+JQogIHJlbmFtZShUb3RhbERlYnQgPSBgVG90YWwgZGVidGApICU+JQogIHJlbmFtZShUYXhMaWFiaWxpdHkgPSBgVGF4IExpYWJpbGl0aWVzYCkgJT4lCiAgcmVuYW1lKFRvdGFsTm9uQ3VycmVudEFzc2VzdHMgPSBgTmV0IERlYnRgKSAlPiUKICByZW5hbWUoSW52ZXN0ZW1lbnRQdXJjaFNhbGVzID0gYEludmVzdG1lbnQgcHVyY2hhc2VzIGFuZCBzYWxlc2ApICU+JQogIHJlbmFtZShBc3NldEdyb3d0aCA9IGBBc3NldCBHcm93dGhgKSAlPiUKICByZW5hbWUoRGVidEdyb3d0aCA9IGBEZWJ0IEdyb3d0aGApIApDbGVhbmVkVHJhaW4KCmBgYAoKYGBge3J9CkNsZWFuZWRUZXN0IDwtCiAgVGVzdERhdGEgJT4lCiAgcmVuYW1lKFJldmVudWVHcm93dGggPSBgUmV2ZW51ZSBHcm93dGhgKSAlPiUKICByZW5hbWUoQ29zdFJldmVudWUgPSBgQ29zdCBvZiBSZXZlbnVlYCkgJT4lCiAgcmVuYW1lKEdyb3NzUHJvZml0ID0gYEdyb3NzIFByb2ZpdGApICU+JQogIHJlbmFtZShSREV4cGVuc2VzID0gYFImRCBFeHBlbnNlc2ApICU+JQogIHJlbmFtZShTR0FFeHBlbnNlPSBgU0cmQSBFeHBlbnNlYCkgJT4lCiAgcmVuYW1lKE9wRXhwZW5zZXMgPSBgT3BlcmF0aW5nIEV4cGVuc2VzYCkgJT4lCiAgcmVuYW1lKE9wSW5jb21lID0gYE9wZXJhdGluZyBJbmNvbWVgKSAlPiUKICByZW5hbWUoSW50ZXJlc3RFeHBlbnNlID0gYEludGVyZXN0IEV4cGVuc2VgKSAlPiUKICByZW5hbWUoSW5jb21lVGF4RXhwZW5zZSA9IGBJbmNvbWUgVGF4IEV4cGVuc2VgKSAlPiUKICByZW5hbWUoTmV0SW5jb21lPSBgTmV0IEluY29tZWApICU+JQogIHJlbmFtZShQcm9maXRNYXJnaW4gPSBgUHJvZml0IE1hcmdpbmApICU+JQogIHJlbmFtZShOZXRQcm9maXRNYXJnaW49IGBOZXQgUHJvZml0IE1hcmdpbmApICU+JQogIHJlbmFtZShUb3RhbEN1cnJlbnRBc3NldHMgPSBgVG90YWwgY3VycmVudCBhc3NldHNgKSAlPiUKICByZW5hbWUoVG90YWxBc3NldHMgPSBgVG90YWwgYXNzZXRzYCkgJT4lCiAgcmVuYW1lKFRvdGFsRGVidCA9IGBUb3RhbCBkZWJ0YCkgJT4lCiAgcmVuYW1lKFRheExpYWJpbGl0eSA9IGBUYXggTGlhYmlsaXRpZXNgKSAlPiUKICByZW5hbWUoVG90YWxOb25DdXJyZW50QXNzZXN0cyA9IGBOZXQgRGVidGApICU+JQogIHJlbmFtZShJbnZlc3RlbWVudFB1cmNoU2FsZXMgPSBgSW52ZXN0bWVudCBwdXJjaGFzZXMgYW5kIHNhbGVzYCkgJT4lCiAgcmVuYW1lKEFzc2V0R3Jvd3RoID0gYEFzc2V0IEdyb3d0aGApICU+JQogIHJlbmFtZShEZWJ0R3Jvd3RoID0gYERlYnQgR3Jvd3RoYCkgCkNsZWFuZWRUZXN0CmBgYAoKIyMjIEZpbHRlciBvdXQgdGhlIGRhdGEgdG8gb25seSBoYXZlIHNlbGVjdCB2YXJpYWJsZXMgZm9yIGFuYWx5c2lzCmBgYHtyfQpOZXdUcmFpbiA8LQogIENsZWFuZWRUcmFpbiAlPiUKICBzZWxlY3QoTmFtZSwgUmV2ZW51ZUdyb3d0aCxHcm9zc1Byb2ZpdCxOZXRJbmNvbWUsIFByb2ZpdE1hcmdpbiwgTmV0UHJvZml0TWFyZ2luLCBUb3RhbERlYnQsIEFzc2V0R3Jvd3RoLCBEZWJ0R3Jvd3RoICkKCk5ld1RyYWluCmBgYAoKYGBge3J9Ck5ld1Rlc3QgPC0KICBDbGVhbmVkVGVzdCU+JQogIHNlbGVjdChOYW1lLCBSZXZlbnVlR3Jvd3RoLEdyb3NzUHJvZml0LE5ldEluY29tZSwgUHJvZml0TWFyZ2luLCBOZXRQcm9maXRNYXJnaW4sIFRvdGFsRGVidCwgQXNzZXRHcm93dGgsIERlYnRHcm93dGggKQoKCk5ld1Rlc3QKYGBgCgoKIyMjIENyZWF0ZSBhIENsYXNzIENvbHVtbiB0aGF0IFJldHVybiBUcnVlIGlmIFJldmVudWVHcm93dGggPiBEZWJ0R3Jvd3RoIGFuZCBGYWxzZSBPdGhlcndpc2UKYGBge3J9CkNsYXNzVGFibGVUcmFpbiA8LQogIE5ld1RyYWluICU+JQogIHRyYW5zbXV0ZShOYW1lID0gTmFtZSwgY2xhc3MgPSBSZXZlbnVlR3Jvd3RoID4gRGVidEdyb3d0aCkKQ2xhc3NUYWJsZVRyYWluCmBgYAoKCmBgYHtyfQpDbGFzc1RhYmxlVGVzdCA8LQogIE5ld1RyYWluICU+JQogIHRyYW5zbXV0ZShOYW1lID0gTmFtZSwgY2xhc3MgPSBSZXZlbnVlR3Jvd3RoID4gRGVidEdyb3d0aCkKQ2xhc3NUYWJsZVRlc3QKYGBgCgoKCgpgYGB7cn0KZndyaXRlKHNhbXBsZVN1Ym1pc3Npb24sICJzdWJtaXQuY3N2IikKYGBgCj4+Pj4+Pj4gNzVkOWZlMmVhNGQ1ZmJmNzNiZDY4NGQwZDJlZDUyNzEzYjhmODg1NwoK